{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import util\n",
    "import os\n",
    "import math\n",
    "import subprocess\n",
    "import numpy as np\n",
    "from tensorflow.keras.datasets import imdb\n",
    "(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[1, 14, 22, 16, 43, 530, 973, 2, 2, 65, 458, 2, 66, 2, 4, 173, 36, 256, 5, 25]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train[0][:20]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\" this film was just brilliant casting � � story direction � really � the part they played and you could just imagine being there robert � is an amazing actor and now the same being director � father came from the same � � as myself so i loved the fact there was a real � with this film the � � throughout the film were great it was just brilliant so much that i � the film as soon as it was released for � and would recommend it to everyone to watch and the � � was amazing really � at the end it was so sad and you know what they say if you � at a film it must have been good and this definitely was also � to the two little � that played the � of � and paul they were just brilliant children are often left out of the � � i think because the stars that play them all � up are such a big � for the whole film but these children are amazing and should be � for what they have done don't you think the whole story was so � because it was true and was � life after all that was � with us all\""
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "util.decode(X_train[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading glove embeddings...\n"
     ]
    }
   ],
   "source": [
    "# Load embeddings\n",
    "if not os.path.exists(\"glove.6B.100d.txt\"):\n",
    "    print(\"Downloading glove embeddings...\")\n",
    "    subprocess.check_output(\n",
    "        \"curl -OL http://nlp.stanford.edu/data/glove.6B.zip && unzip glove.6B.zip\", shell=True)\n",
    "embeddings_index = dict()\n",
    "f = open('glove.6B.100d.txt')\n",
    "print(\"Loading globe embeddings...\")\n",
    "for line in f:\n",
    "    values = line.split()\n",
    "    word = values[0]\n",
    "    coefs = np.asarray(values[1:], dtype='float32')\n",
    "    embeddings_index[word] = coefs\n",
    "f.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.21388251764217375"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def cosine_sim(v1,v2):\n",
    "    \"compute cosine similarity of v1 to v2: (v1 dot v2)/{||v1||*||v2||)\"\n",
    "    sumxx, sumxy, sumyy = 0, 0, 0\n",
    "    for i in range(len(v1)):\n",
    "        x = v1[i]; y = v2[i]\n",
    "        sumxx += x*x\n",
    "        sumyy += y*y\n",
    "        sumxy += x*y\n",
    "    return sumxy/math.sqrt(sumxx*sumyy)\n",
    "film = embeddings_index[\"film\"]\n",
    "movie = embeddings_index[\"movie\"]\n",
    "book = embeddings_index[\"book\"]\n",
    "car = embeddings_index[\"car\"]\n",
    "truck = embeddings_index[\"truck\"]\n",
    "plane = embeddings_index[\"plane\"]\n",
    "cosine_sim(film, truck)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-1.9744e-01,  4.4831e-01,  1.3689e-01, -1.5595e-01,  9.3600e-01,\n",
       "        7.2986e-01,  3.4099e-01, -3.3896e-01, -8.9569e-02, -4.7706e-01,\n",
       "        3.5112e-01, -4.2198e-01, -1.2221e-01, -6.3375e-02, -4.5820e-01,\n",
       "        7.8723e-01,  9.4045e-01,  8.1101e-02, -2.3224e-01,  4.0778e-01,\n",
       "        3.3258e-01, -4.4458e-01, -4.7117e-01,  1.4852e-01,  9.6308e-01,\n",
       "       -6.5267e-02, -5.3661e-02, -6.7474e-01, -4.2364e-01,  9.4392e-02,\n",
       "       -3.8668e-01,  1.8237e-01, -1.2846e-01, -2.1952e-01, -5.8993e-01,\n",
       "        7.3602e-01, -2.4009e-01,  3.2392e-01, -2.4663e-01, -4.0684e-01,\n",
       "       -5.2468e-01,  4.6174e-01, -1.4936e-01, -1.1999e-01, -1.3990e-01,\n",
       "       -4.4944e-01, -2.6565e-01, -7.0061e-01,  3.0188e-01, -1.1209e-01,\n",
       "        6.6323e-01,  3.9698e-01,  6.9158e-01,  8.3442e-01, -5.2717e-01,\n",
       "       -2.5314e+00,  1.3281e-01,  3.0253e-01,  1.1062e+00,  7.2221e-03,\n",
       "        2.6031e-01,  1.1584e+00, -7.9330e-02, -7.6659e-01,  1.2623e+00,\n",
       "       -6.2071e-01,  5.9821e-01,  7.3539e-01,  3.8573e-01, -4.0293e-01,\n",
       "       -3.1440e-02,  7.7863e-01,  3.1525e-01,  1.9003e-01, -6.5821e-01,\n",
       "        4.0548e-01,  5.3596e-03,  5.5274e-02, -1.2238e+00, -4.8912e-02,\n",
       "       -3.0511e-01,  4.4473e-01, -3.3826e-01, -2.2133e-01, -1.3214e+00,\n",
       "       -6.4761e-01, -4.4021e-01, -1.4910e+00, -2.2495e-02,  6.0346e-02,\n",
       "        1.4833e-01,  4.4162e-01,  7.9787e-01, -2.8076e-01, -2.9400e-02,\n",
       "       -1.5656e-01, -1.2650e-01, -5.6968e-01,  1.5374e-03,  6.6600e-01],\n",
       "      dtype=float32)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "embeddings_index[\"book\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
